check_packages <- function(packages) {
if (all(packages %in% rownames(installed.packages()))) {
TRUE
} else{
cat(
"Instalar los siguientes packages antes de ejecutar el presente script\n",
packages[!(packages %in% rownames(installed.packages()))],
"\n"
)
}
}
packages_needed <- c("ggplot2", "ggrepel", "plotly", "sqldf",
"lubridate", "htmlwidgets" , "RColorBrewer",
"data.table", "readr" )
check_packages(packages_needed)
## [1] TRUE
library(ggplot2)
library(ggrepel)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(htmlwidgets)
library(RColorBrewer)
library(grid)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
URL <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"
url_archivo <- paste(URL,"time_series_covid19_confirmed_global.csv", sep = "")
COVID_19_h <- read.csv(url_archivo, sep = ",", header = T)
COVID_19_h$Lat <- NULL
COVID_19_h$Long <- NULL
COVID_19_h$Province.State <- NULL
setnames(COVID_19_h ,
old = c( "Country.Region" ),
new = c( "pais"))
library(tidyr)
COVID_19 <- COVID_19_h %>% gather(fecha, confirmados, 2:ncol(COVID_19_h))
COVID_19 <- COVID_19 %>% group_by(pais,fecha) %>% summarise(confirmados = sum(confirmados))
## `summarise()` has grouped output by 'pais'. You can override using the `.groups` argument.
confirmados_por_pais <- COVID_19 %>% group_by(pais) %>% summarise(confirmados = max(confirmados))
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.1.0 v dplyr 1.0.5
## v readr 1.4.0 v stringr 1.4.0
## v purrr 0.3.4 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x dplyr::between() masks data.table::between()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x dplyr::first() masks data.table::first()
## x data.table::hour() masks lubridate::hour()
## x lubridate::intersect() masks base::intersect()
## x data.table::isoweek() masks lubridate::isoweek()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks data.table::last()
## x data.table::mday() masks lubridate::mday()
## x data.table::minute() masks lubridate::minute()
## x data.table::month() masks lubridate::month()
## x data.table::quarter() masks lubridate::quarter()
## x data.table::second() masks lubridate::second()
## x lubridate::setdiff() masks base::setdiff()
## x purrr::transpose() masks data.table::transpose()
## x lubridate::union() masks base::union()
## x data.table::wday() masks lubridate::wday()
## x data.table::week() masks lubridate::week()
## x data.table::yday() masks lubridate::yday()
## x data.table::year() masks lubridate::year()
library(dplyr)
confirmados_por_pais <- confirmados_por_pais %>%
mutate(pais = str_replace(pais, "Korea\\, South" , "South Korea")) %>%
mutate(pais = str_replace(pais, "Congo \\(Kinshasa\\)", "Democratic Republic of the Congo"))%>%
mutate(pais = str_replace(pais, "Taiwan\\*" , "Taiwan")) %>%
mutate(pais = str_replace(pais, "US" , "United States of America")) %>%
mutate(pais = str_replace(pais, "Brunei" , "Brunei Darussalam")) %>%
mutate(pais = str_replace(pais, "Cote d'Ivoire" , "Costa de Marfil")) %>%
mutate(pais = str_replace(pais, "Holy See" , "Vatican City")) %>%
mutate(pais = str_replace(pais, "Czechia" , "Czech Republic")) %>%
mutate(pais = str_replace(pais, "Diamond Princess" , "crucero Diamond Princess"))%>%
mutate(pais = str_replace(pais, "MS Zaandam" , "crucero MS Zaandam")) %>%
mutate(pais = str_replace(pais, "Timor-Leste" , "East Timor"))
library(readr)
habitantes <- read_delim("C:/visualizacion/WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.csv",
";", escape_double = FALSE, col_types = cols(cantidad = col_number()),
locale = locale(grouping_mark = "", encoding = "WINDOWS-1252"),
trim_ws = TRUE)
colnames(habitantes)
## [1] "Index" "Variant" "pais" "Notes" "codigo" "Type" "cantidad"
habitantes <- select(habitantes, pais, codigo , cantidad )
setnames(habitantes, "cantidad", "cantHabitantes")
datos <- merge(habitantes, confirmados_por_pais)
options(scipen = 6)
datos$porCien <- datos$confirmados * 100 / datos$cantHabitantes
no_estan <- sqldf("select c.pais
from confirmados_por_pais c
where not exists (select '1'
from datos d
where c.pais = d.pais)")
no_estan
## pais
## 1 crucero Diamond Princess
## 2 crucero MS Zaandam
datos_t <- arrange(datos, desc(datos$porCien) )
write.csv2(datos, "datos_01.csv", row.names = FALSE, fileEncoding = "UTF-8")
datos <- sqldf( "select *
from datos_t
LIMIT 25 ")
g1 <- ggplot(datos ,aes(x = reorder(pais, porCien) , y = porCien, label = confirmados ) ) +
geom_segment(size = 0.08, aes(xend = pais, yend=0))+
coord_flip() +
geom_point( size=1, color="orange") +
ggtitle(paste0("COVID_19 - Confirmados por cada 100 habitantes - ", today()) ) +
theme(plot.title = element_text(lineheight = 1,face ='bold')) +
ylab("confirmados por cada 100 habitantes") +
xlab("") +
labs(caption = "\nFuente: The Johns Hopkins University Center for Systems Science and Engineering (JHU CSSE)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(aes(label = round(porCien,1)), position = position_stack(vjust = .5))
g1 <- ggplotly(g1, tooltip = c("confirmados")) %>%
layout(legend = list(
orientation = "h",
x = 0.7,
y = 0
)
)
g1